Process the OT Data from Kris Bedka
Contents
Process the OT Data from Kris Bedka¶
Imports¶
import xarray as xr
from distributed import LocalCluster, Client
import glob
import hvplot.xarray
import pyart
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
import dask
import warnings
warnings.filterwarnings('ignore')
Set which dates to read in¶
dates = [datetime(2018, 11, 29),
datetime(2018, 12, 5),
]
search_patterns = [date.strftime('*%Y%m%d*') for date in dates]
from distributed import Client
client = Client()
client
2023-03-21 13:35:00,067 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-ft35qliy', purging
2023-03-21 13:35:00,068 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-0twwdxs5', purging
2023-03-21 13:35:00,068 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-78ob8fo_', purging
2023-03-21 13:35:00,069 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-aahe44me', purging
2023-03-21 13:35:00,069 - distributed.diskutils - INFO - Found stale lock file and directory '/var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-p9bsuwjm', purging
Client
Client-15d50faa-c817-11ed-963e-520a01803a93
| Connection method: Cluster object | Cluster type: distributed.LocalCluster |
| Dashboard: http://127.0.0.1:8787/status |
Cluster Info
LocalCluster
9c861d12
| Dashboard: http://127.0.0.1:8787/status | Workers: 5 |
| Total threads: 10 | Total memory: 32.00 GiB |
| Status: running | Using processes: True |
Scheduler Info
Scheduler
Scheduler-3c31971f-1d07-4096-8270-ba74f1eeb4aa
| Comm: tcp://127.0.0.1:50654 | Workers: 5 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 10 |
| Started: Just now | Total memory: 32.00 GiB |
Workers
Worker: 0
| Comm: tcp://127.0.0.1:50668 | Total threads: 2 |
| Dashboard: http://127.0.0.1:50672/status | Memory: 6.40 GiB |
| Nanny: tcp://127.0.0.1:50657 | |
| Local directory: /var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-80zspjmh | |
Worker: 1
| Comm: tcp://127.0.0.1:50669 | Total threads: 2 |
| Dashboard: http://127.0.0.1:50674/status | Memory: 6.40 GiB |
| Nanny: tcp://127.0.0.1:50658 | |
| Local directory: /var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-rfedft_r | |
Worker: 2
| Comm: tcp://127.0.0.1:50670 | Total threads: 2 |
| Dashboard: http://127.0.0.1:50675/status | Memory: 6.40 GiB |
| Nanny: tcp://127.0.0.1:50659 | |
| Local directory: /var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-_8q9e1le | |
Worker: 3
| Comm: tcp://127.0.0.1:50667 | Total threads: 2 |
| Dashboard: http://127.0.0.1:50676/status | Memory: 6.40 GiB |
| Nanny: tcp://127.0.0.1:50660 | |
| Local directory: /var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-53pcnx0e | |
Worker: 4
| Comm: tcp://127.0.0.1:50671 | Total threads: 2 |
| Dashboard: http://127.0.0.1:50673/status | Memory: 6.40 GiB |
| Nanny: tcp://127.0.0.1:50661 | |
| Local directory: /var/folders/bw/c9j8z20x45s2y20vv6528qjc0000gq/T/dask-worker-space/worker-ih9z81kv | |
for search_pattern in search_patterns[1:]:
files = sorted(glob.glob(f"../../data/relampago-raw-ot-data/{search_pattern}"))
ds = xr.open_mfdataset(files,
chunks={'time':120},
concat_dim='time',
combine='nested')
# Add in the parallax correction
ds["parallax_correction_latitude"] = ds.parallax_correction_latitude + ds.latitude
ds["parallax_correction_longitude"] = ds.parallax_correction_longitude + ds.longitude
ds = ds.set_coords(['parallax_correction_longitude',
'parallax_correction_latitude'])
# Create an output file name from the time
file_label = pd.to_datetime(ds.time.values[0]).strftime("ot_output_%Y%m%d.zarr")
try:
# Make sure the times are in a single chunk
ds.chunk({'time':120}).to_zarr(f"../../data/cot-zarr/{file_label}")
except:
continue
print("Done with: ", file_label)
Done with: ot_output_20181205.zarr
day = '20181205'
ot_ds = xr.open_zarr(f"../../data/cot-zarr/ot_output_{day}.zarr")
ot_ds = ot_ds.rename({'parallax_correction_latitude':'lat',
'parallax_correction_longitude':'lon'})
ot_ds.time.attrs["long_name"] = 'time'
ot_prob = ot_ds.ot_probability.hvplot.quadmesh(x='lon',
y='lat',
groupby='time',
clabel='OT Probability',
rasterize=True,
clim=(.5, 1),
height=600,
cmap='Spectral_r')
ir_bt = ot_ds.ir_brightness_temperature.hvplot.quadmesh(rasterize=True,
x='lon',
y='lat',
clabel='Channel 13 IR Brightness Temperature (K)',
groupby='time',
cmap='Spectral_r',
clim=(190, 290),
height=600)
ot_ds.ot_probability.max(dim=['nlines', 'npixels']).plot()
/Users/mgrover/miniforge3/envs/pyart-dev/lib/python3.10/site-packages/dask/array/reductions.py:640: RuntimeWarning: All-NaN slice encountered
return np.nanmax(x_chunk, axis=axis, keepdims=keepdims)
[<matplotlib.lines.Line2D at 0x2cbec75e0>]
(ot_prob + ir_bt).cols(1)